from bs4 import BeautifulSoup as BS
import os,xlwt,xlrd,traceback,requests,codecs
from urllib import request
from xlrd import open_workbook
from xlutils.copy import copy

def get_url(url):
    _html=request.urlopen(url).read()
    _soup=BS(_html,"lxml")
    
    #获取网页的链接
    _content=_soup.find_all("li", attrs={"class":"clear"})
#     print(_content)
    i=0
    for i in range(len(_content)-1):
        _all_url=_content[i].find(attrs={"class":"img"}).get("href")
        print(_all_url)
    get_content(_all_url)
def  get_content(url):
    html=request.urlopen(url).read()
    soup=BS(html,"lxml")
    
    #抓取基本属性

    a_content=soup.find("div",attrs={"class":"m-content"})
#     print(a_content)
    jiben=a_content.find(attrs={"class":"base"})
    s_jiben=jiben.find("ul").text
#     print(s_jiben)
    jiaoyi=a_content.find(attrs={"class":"transaction"})
    s_jiaoyi=jiaoyi.find("ul").text
#     print(s_jiaoyi)
    
    #抓取房源特色
#     fy=soup.findl("div",attrs={"class":"iintroContent showbasemore"})
#     print(fy)
#     s_fy=fy.find(attrs={"class":"baseattribute clear"})
#     print(s_fy)
    #抓取房主自荐
    zj=soup.find("div",attrs={"class":"newwrap shuofang"})
    s_zj=zj.find(attrs={"class":"title clear"}).text
#     print(s_zj)
    s_zjs=zj.find(attrs={"class":"bd"}).text
#     print(s_zjs)
    #抓取房源图片
    os.getcwd()
    os.chdir("D://tupian")
    os.getcwd()
    img=soup.find("div",attrs={"class":"content-wrapper housePic"})
    print(img)
    img_list=img.find_all("img")
    print(img_list)
    
    #将图片保存在本地，用for循环进行遍历
    j=1
    for s_img in img_list :
        #抓取到每个图片的链接
        img_url=s_img["src"]
        print(img_url)
        img_content=requests.get(img_url).content
        filenames=str[j]+'.jpg'
        with open(os.getcwd()+"/"+filenames,"wb") as wf:
            wf.write(img_content)
        j+=1
    

    

    
   

if __name__=="__main__" :
    
        start_url = "http://sz.lianjia.com/ershoufang/"
        get_url(start_url)
       
        
    